from collections import defaultdict
import csv
import os
import sys

#Levenshtein credit:
#https://gist.github.com/mattgathu/5bd17a0e2c1651584fd4c866baacea23	
def levenshtein(a,b):
    "Calculates the Levenshtein distance between a and b."
    n, m = len(a), len(b)
    if n > m:
        # Make sure n <= m, to use O(min(n,m)) space
        a,b = b,a
        n,m = m,n
        
    current = range(n+1)
    for i in range(1,m+1):
        previous, current = current, [i]+[0]*n
        for j in range(1,n+1):
            add, delete = previous[j]+1, current[j-1]+1
            change = previous[j-1]
            if a[j-1] != b[i-1]:
                change = change + 1
            current[j] = min(add, delete, change)
            
    return current[n]

if __name__ == "__main__":
	if len(sys.argv) != 2:
		print "Please pass CSV file to the script as a parameter"
		print "[script].py [csv file]"
		sys.exit()
	if not os.path.isfile(sys.argv[1]):
		print "CSV file not found or can't access\n"
		print "[script].py [csv file]"
		sys.exit()

	a = open(sys.argv[1],"r").readlines()
	e = []
	for x in a:
		e.append(x.split("\t"))

	#Dict of <set id, details of entry>
	entries = defaultdict(list)
	for x in e:
		if x[1] in entries:
			entries[x[1]].append(x)
		else:
			entries[x[1]] = [x]


	p = []
	s = []
	reversed_sets = []
	for x in entries.keys():
		p = []
		s = []
		for y in entries[x]:
			p.append(y[3]) # Rank Delivered (after shuffling)
			s.append(y[6]) # Rank after Reranking
		#How much reversedness you want to find
		#A small levenshtein distance between [order delivered] and
		#the reverse of [order after reranking] means closer to reversed
		if levenshtein(p,s[::-1]) <= 1: #if reverse of [reranked] is within a distance of 2 to [delivered]
		#if p[0:3] == s[::-1][0:3]: #if the last 3 elements after reranking are same as first three delivered
			reversed_sets.append(x)

	out = []
	for x in reversed_sets:
		if str(int(x)) in entries:
			for y in entries[str(int(x))]:
				out.append(y)

	with open("ReversedSets.csv","w") as o:
	     	w = csv.writer(o, delimiter="\t",lineterminator="\n")
		for x in out:
			w.writerow(x)

